library(robust)
Loading required package: fit.models

train <- train %>%
  filter(price > quantile(price, 0.01), price < quantile(price, 0.99)) %>%
  filter(floor_area > quantile(floor_area, 0.01), floor_area < quantile(floor_area, 0.99))

Let’s build a simple linear model with floor_area, num_rooms, category + is_business

summary(m1)

Call:
lm(formula = price ~ floor_area + rooms_num + category + is_business, 
    data = train)

Residuals:
    Min      1Q  Median      3Q     Max 
-965142 -106788  -28235   72750 1754924 

Coefficients:
                                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)                    -164040.83    7491.88 -21.896  < 2e-16 ***
floor_area                        3912.99      45.52  85.954  < 2e-16 ***
rooms_num                        15644.40    1551.86  10.081  < 2e-16 ***
categoryMieszkanie na sprzedaż  240446.49    4646.16  51.752  < 2e-16 ***
is_business                      29132.25    4118.09   7.074 1.54e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 203300 on 30665 degrees of freedom
  (31 observations deleted due to missingness)
Multiple R-squared:  0.3993,    Adjusted R-squared:  0.3992 
F-statistic:  5096 on 4 and 30665 DF,  p-value: < 2.2e-16

Plot1

\[ (y_i - \hat{y_i}, \hat{y}_i) \]

Now, we will calculate robust model with MASS::rlm

summary(m1)

Call:
lm(formula = price ~ floor_area + rooms_num + category + is_business, 
    data = train)

Residuals:
    Min      1Q  Median      3Q     Max 
-965142 -106788  -28235   72750 1754924 

Coefficients:
                                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)                    -164040.83    7491.88 -21.896  < 2e-16 ***
floor_area                        3912.99      45.52  85.954  < 2e-16 ***
rooms_num                        15644.40    1551.86  10.081  < 2e-16 ***
categoryMieszkanie na sprzedaż  240446.49    4646.16  51.752  < 2e-16 ***
is_business                      29132.25    4118.09   7.074 1.54e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 203300 on 30665 degrees of freedom
  (31 observations deleted due to missingness)
Multiple R-squared:  0.3993,    Adjusted R-squared:  0.3992 
F-statistic:  5096 on 4 and 30665 DF,  p-value: < 2.2e-16
sum(m2$w < 1) / NROW(m2$w)
[1] 0.2250408
plot(m2$y, m2$w, xlab = "Price", ylab = "Huber weight", 
     main = "Distribution of prices and Huber weights")


plot(m2$x[,"floor_area"], m2$w, xlab = "Floor area", ylab = "Huber weight", 
     main = "Distribution of floor area and Huber weights")

summary(m3)

Call: rlm(formula = price ~ floor_area + rooms_num + category + is_business, 
    data = train, psi = psi.bisquare, y.ret = TRUE)
Residuals:
    Min      1Q  Median      3Q     Max 
-804133  -79137   -2171   98069 1789765 

Coefficients:
                               Value       Std. Error  t value    
(Intercept)                    -41032.0523   5219.4403     -7.8614
floor_area                       2786.9866     31.7158     87.8738
rooms_num                       21215.1095   1081.1512     19.6227
categoryMieszkanie na sprzedaż 138958.6328   3236.8841     42.9298
is_business                     25870.1684   2868.9866      9.0172

Residual standard error: 129200 on 30665 degrees of freedom
  (31 observations deleted due to missingness)
sum(m3$w) / NROW(m3$w)
[1] 0.8637105
plot(m3$y, m3$w, xlab = "Price", ylab = "Huber weight", 
     main = "Distribution of prices and Huber weights")


plot(m3$x[,"floor_area"], m3$w, xlab = "Floor area", ylab = "Huber weight", 
     main = "Distribution of floor area and Huber weights")

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9Cmluc3RhbGwucGFja2FnZXMoInJvYnVzdCIpCmxpYnJhcnkoZGF0YS50YWJsZSkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkobHVicmlkYXRlKQpsaWJyYXJ5KGxtdGVzdCkgICMjIApsaWJyYXJ5KE1BU1MpCmxpYnJhcnkocm9idXN0KQpgYGAKCmBgYHtyfQp0cmFpbiA8LSBmcmVhZCgifi9naXQvemJpb3J5L290b2RvbS90cmFpbi1zbWFsbDMuY3N2IiwgCiAgICAgICAgICAgICAgIGNvbC5uYW1lcyA9IGMoImlkIiwgImNyZWF0ZWRfYXRfZmlyc3QiLCAiY2F0ZWdvcnkiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICJpc19idXNpbmVzcyIsICJkaXN0cmljdF9pZCIsICJjaXR5X2lkIiwgInJlZ2lvbl9pZCIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICJwYXJhbXMiLCAidGl0bGUiLCAicHJpY2UiKSkgJT4lCiAgbXV0YXRlKGZsb29yX2FyZWEgPSBzdHJfZXh0cmFjdChwYXJhbXMsICJtPD0+XFxkezEsfShcXC5cXGR7MSwyfSk/IiksCiAgICAgICAgIGZsb29yX2FyZWEgPSBzdHJfZXh0cmFjdChmbG9vcl9hcmVhLCAiXFxkezEsfShcXC5cXGR7MSwyfSk/IiksCiAgICAgICAgIGZsb29yX2FyZWEgPSBhcy5udW1lcmljKGZsb29yX2FyZWEpLAogICAgICAgICByb29tc19udW0gPSBzdHJfZXh0cmFjdChwYXJhbXMsICJyb29tc19udW08PT5cXGR7MSx9IiksCiAgICAgICAgIHJvb21zX251bSA9IHN0cl9leHRyYWN0KHJvb21zX251bSwgIlxcZHsxLH0iKSwKICAgICAgICAgcm9vbXNfbnVtID0gYXMubnVtZXJpYyhyb29tc19udW0pLGNyZWF0ZWRfYXRfZmlyc3QgPSB5bWRfaG1zKGNyZWF0ZWRfYXRfZmlyc3QpLAogICAgICAgICB5cSA9IHF1YXJ0ZXIoY3JlYXRlZF9hdF9maXJzdCwgd2l0aF95ZWFyID0gVCkpCgpoZWFkKHRyYWluKQpgYGAKCmBgYHtyfQpwbG90KHRyYWluJGZsb29yX2FyZWEsIHRyYWluJHByaWNlKQpgYGAKCmBgYHtyfQp0cmFpbiA8LSB0cmFpbiAlPiUKICBmaWx0ZXIocHJpY2UgPiBxdWFudGlsZShwcmljZSwgMC4wMSksIHByaWNlIDwgcXVhbnRpbGUocHJpY2UsIDAuOTkpKSAlPiUKICBmaWx0ZXIoZmxvb3JfYXJlYSA+IHF1YW50aWxlKGZsb29yX2FyZWEsIDAuMDEpLCBmbG9vcl9hcmVhIDwgcXVhbnRpbGUoZmxvb3JfYXJlYSwgMC45OSkpCmBgYAoKYGBge3J9CnBsb3QodHJhaW4kZmxvb3JfYXJlYSwgdHJhaW4kcHJpY2UpCmBgYAoKTGV0J3MgYnVpbGQgYSBzaW1wbGUgbGluZWFyIG1vZGVsIHdpdGggZmxvb3JfYXJlYSwgbnVtX3Jvb21zLCBjYXRlZ29yeSArIGlzX2J1c2luZXNzCgpgYGB7cn0KbTEgPC0gbG0oZm9ybXVsYSA9IHByaWNlIH4gZmxvb3JfYXJlYSArIHJvb21zX251bSArIGNhdGVnb3J5ICsgaXNfYnVzaW5lc3MsIAogICAgICAgICBkYXRhID0gdHJhaW4pCgpzdW1tYXJ5KG0xKQpgYGAKClBsb3QxIAoKJCQKKHlfaSAtIFxoYXR7eV9pfSwgXGhhdHt5fV9pKQokJAoKYGBge3J9CnBsb3QobTEpCmBgYAoKCk5vdywgd2Ugd2lsbCBjYWxjdWxhdGUgcm9idXN0IG1vZGVsIHdpdGggTUFTUzo6cmxtCgpgYGB7cn0KbTIgPC0gTUFTUzo6cmxtKGZvcm11bGEgPSBwcmljZSB+IGZsb29yX2FyZWEgKyByb29tc19udW0gKyBjYXRlZ29yeSArIGlzX2J1c2luZXNzLCAKICAgICAgICAgICAgICAgIGRhdGEgPSB0cmFpbiwgeS5yZXQgPSBUUlVFKQpzdW1tYXJ5KG0xKQpzdW1tYXJ5KG0yKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KG0yJHcpCnN1bShtMiR3IDwgMSkgLyBOUk9XKG0yJHcpCmBgYAoKYGBge3J9CnBsb3QobTIkeSwgbTIkdywgeGxhYiA9ICJQcmljZSIsIHlsYWIgPSAiSHViZXIgd2VpZ2h0IiwgCiAgICAgbWFpbiA9ICJEaXN0cmlidXRpb24gb2YgcHJpY2VzIGFuZCBIdWJlciB3ZWlnaHRzIikKCnBsb3QobTIkeFssImZsb29yX2FyZWEiXSwgbTIkdywgeGxhYiA9ICJGbG9vciBhcmVhIiwgeWxhYiA9ICJIdWJlciB3ZWlnaHQiLCAKICAgICBtYWluID0gIkRpc3RyaWJ1dGlvbiBvZiBmbG9vciBhcmVhIGFuZCBIdWJlciB3ZWlnaHRzIikKYGBgCgpgYGB7cn0KbTMgPC0gTUFTUzo6cmxtKGZvcm11bGEgPSBwcmljZSB+IGZsb29yX2FyZWEgKyByb29tc19udW0gKyBjYXRlZ29yeSArIGlzX2J1c2luZXNzLCAKICAgICAgICAgICAgICAgIGRhdGEgPSB0cmFpbiwgeS5yZXQgPSBUUlVFLCBwc2kgPSBwc2kuYmlzcXVhcmUpCnN1bW1hcnkobTMpCmBgYAoKCmBgYHtyfQpoaXN0KG0zJHcsIGJyZWFrcyA9ICJmZCIpCnN1bShtMyR3KSAvIE5ST1cobTMkdykKYGBgCgpgYGB7cn0KcGxvdChtMyR5LCBtMyR3LCB4bGFiID0gIlByaWNlIiwgeWxhYiA9ICJIdWJlciB3ZWlnaHQiLCAKICAgICBtYWluID0gIkRpc3RyaWJ1dGlvbiBvZiBwcmljZXMgYW5kIEh1YmVyIHdlaWdodHMiKQoKcGxvdChtMyR4WywiZmxvb3JfYXJlYSJdLCBtMyR3LCB4bGFiID0gIkZsb29yIGFyZWEiLCB5bGFiID0gIkh1YmVyIHdlaWdodCIsIAogICAgIG1haW4gPSAiRGlzdHJpYnV0aW9uIG9mIGZsb29yIGFyZWEgYW5kIEh1YmVyIHdlaWdodHMiKQpgYGAKCmBgYHtyfQpwbG90KG0yJHcsIG0zJHcsIHhsYWIgPSAiSHViZXIgd2VpZ2h0cyIsIHlsYWIgPSAiQmlzcXVhcmUgd2VpZ2h0cyIpCmBgYAoK